# -*- coding: utf-8 -*-
'''
Created on Jul 6, 2014

@author: LONG HOANG GIANG
'''
import os
import sys
sys.path.append(os.path.join(os.path.dirname(__file__), '../'))
import lib
import json

def crawl():
    urlfm = 'http://xkcn.info/page/{0}'
    
    data = []
    
    for page in range(1, 300):
        url = urlfm.format(page)
        tree = lib.Web.load(url, cached=True).build_tree()
        idata = []
        for node in tree.xpath("//div[@id='grid']/div[contains(@class, 'gridItem gridphoto  id-')]"):
            imageUrl = node.get('data-photo-high', '')
            if imageUrl == '': continue
            idata.append(imageUrl)
        data.append(idata)
        print '>Page:', page
        for image in idata:
            print image
    lib.gz_file_put_content(lib.encryptCipher(json.dumps(data)), '/longhoanggiang/xkcn.data')
            
if __name__ == '__main__':
    
    crawl()
    
    print '> Finished'
    os._exit(1)